---
title: "Other Analysis"
output: html_document
---

```{r}

# set r markdown chunk defaults
knitr::opts_chunk$set(warning = F, message = F)

```

# Description

This file imports the cleaned YouGov data (created in 'cleaning.Rmd') and runs the analysis used to create Figure 4 in the manuscript.

# Setup

```{r}

# clearing memory --------------------------------------------------------------
rm(list = ls())

# set seed ---------------------------------------------------------------------
set.seed(45481928)

# load packages ----------------------------------------------------------------
library(here) # builds a path to the top level of your project file
library(tidyverse)
library(survey)
library(srvyr)
library(ggeffects)
library(stargazer)
library(broom)
library(ggstance) # for geom_pointrangeh() 
library(ggpubr)
library(arm)
library(gridExtra)
library(doParallel)
library(foreach)
library(broom)
library(margins)
library(patchwork)
library(relimp, pos = 4)
library(paran)
library(clarify)
library(viridis)
library(stringr)


# useful functions -------------------------------------------------------------

# function to calculate standard error of mean
std_error <- function(var){
  sd(var, na.rm = T) / sqrt(length(var[!is.na(var)]))
}

# function to manually bin continous variables
bin_fun <- function(x, bin_size){
  bin_size * ceiling(x / bin_size) - bin_size / 2
}

# function to put variables on a 0-1 scale
rescale_01 <- function(x, max){
  (x-1)/(max-1)
}

# function to compute two-way clustered standard errors with survey weights
source(here("code", "helper functions", "glmweave.R"))

# set n_sim
n_sim <- 1000

# plotting colors
cb_black <- "#000000" # Discernment / all headlines
cb_blue <- "#0072B2"  # Democratic headlines
cb_red<- "#D55E00"    # Republican headlines
cb_green = "#009E73"  # True headlines
cb_orange = "#E69F00" # False headlines

```

```{r}

# load cleaned data
long <- read.csv(here("data", "cleaned", "long.csv"))
yougov <- read.csv(here("data", "cleaned", "yougov.csv"))

# rescale pid variable
long$pid7_dem_sd <- as.numeric(scale(8-long$pid7))

```

# Figure 4

## Combined (All IVs in same model)

```{r, eval = T}

# create vector of IV names
iv_vars <- c("pid7_dem_sd", "share_accurate_sd", "chaos_mean_sd", "political_interest_sd", "age_sd", "female",
             "education_sd", "news_is_biased_sd", "trust_news_soc_sd", "fb_use_sd", "faminc_new_sd")

# create agreeable dataset
m1_combined_agreeable_data <- 
  long %>% filter(cond_int == "control") %>% filter(hdl_agreeable == 1)  %>% 
        dplyr::select(c(all_of(iv_vars), share_01, hdl_true, weight, hdl_id, caseid)) %>% 
        na.omit()

# create disagreeable dataset
m1_combined_disagreeable_data <- 
  long %>% filter(cond_int == "control") %>% filter(hdl_agreeable == 0)  %>% 
        dplyr::select(c(all_of(iv_vars), share_01, hdl_true, weight, hdl_id, caseid)) %>% 
        na.omit()

# run agreeable model
m1_combined_agreeable <- 
  glm(share_01 ~ hdl_true*(pid7_dem_sd + share_accurate_sd + chaos_mean_sd + political_interest_sd + age_sd +
                             education_sd + news_is_biased_sd + trust_news_soc_sd + fb_use_sd + faminc_new_sd + female), 
      family = "gaussian", weights = weight, data = m1_combined_agreeable_data)

# run disagreeable model
m1_combined_disagreeable <- 
  glm(share_01 ~ hdl_true*(pid7_dem_sd + share_accurate_sd + chaos_mean_sd + political_interest_sd + age_sd +
                             education_sd + news_is_biased_sd + trust_news_soc_sd + fb_use_sd + faminc_new_sd + female), 
      family = "gaussian", weights = weight, data = m1_combined_disagreeable_data)


# simulation based inference (agreeable model)

m1_combined_agreeable_out <- list()

for(i in 1:length(iv_vars)){
  
  tmp_sim <- 
    clarify::sim(m1_combined_agreeable, n = n_sim, 
                 vcov = xeffect.glm(glm.obj = m1_combined_agreeable, g1 = m1_combined_agreeable$data$caseid, g2 = m1_combined_agreeable$data$hdl_id))
  
  tmp_apply <- 
    sim_apply(tmp_sim, cl = 6,
              function(fit){
                true_high <- predict(fit, newdata = m1_combined_agreeable_data %>% mutate(hdl_true = 1, !!iv_vars[i] := 1), type = "response")
                true_low <- predict(fit, newdata = m1_combined_agreeable_data %>% mutate(hdl_true = 1, !!iv_vars[i] := 0), type = "response")
                false_high <- predict(fit, newdata = m1_combined_agreeable_data %>% mutate(hdl_true = 0, !!iv_vars[i] := 1), type = "response")
                false_low <- predict(fit, newdata = m1_combined_agreeable_data %>% mutate(hdl_true = 0, !!iv_vars[i] := 0), type = "response")
                
                c(true_high = mean(true_high), true_low = mean(true_low), 
                  false_high = mean(false_high), false_low = mean(false_low))})
  
  m1_combined_agreeable_out[[i]] <-  summary(transform(tmp_apply, 
                                                       `diff` = (true_high - false_high) - (true_low - false_low), 
                                                       `ratio` = (true_high / false_high) / (true_low / false_low)), 
                                             parm = c("diff", "ratio")) %>% 
    as.data.frame() %>%
    mutate(iv = iv_vars[i])
  
}

m1_combined_agreeable_out <- 
  m1_combined_agreeable_out %>% bind_rows() %>% rownames_to_column("quantity") %>%
  mutate(quantity = ifelse(str_detect(quantity, "diff"), "diff", "ratio"),
         hdl_agreeable = "agreeable")


# simulation based inference (disagreeable model)

m1_combined_disagreeable_out <- list()

for(i in 1:length(iv_vars)){
  
  tmp_sim <- 
    clarify::sim(m1_combined_disagreeable, n = n_sim, 
                 vcov = xeffect.glm(glm.obj = m1_combined_disagreeable, g1 = m1_combined_disagreeable$data$caseid, g2 = m1_combined_disagreeable$data$hdl_id))
  
  tmp_apply <- 
    sim_apply(tmp_sim, cl = 6,
              function(fit){
                true_high <- predict(fit, newdata = m1_combined_disagreeable_data %>% mutate(hdl_true = 1, !!iv_vars[i] := 1), type = "response")
                true_low <- predict(fit, newdata = m1_combined_disagreeable_data %>% mutate(hdl_true = 1, !!iv_vars[i] := 0), type = "response")
                false_high <- predict(fit, newdata = m1_combined_disagreeable_data %>% mutate(hdl_true = 0, !!iv_vars[i] := 1), type = "response")
                false_low <- predict(fit, newdata = m1_combined_disagreeable_data %>% mutate(hdl_true = 0, !!iv_vars[i] := 0), type = "response")
                
                c(true_high = mean(true_high), true_low = mean(true_low), 
                  false_high = mean(false_high), false_low = mean(false_low))})
  
  m1_combined_disagreeable_out[[i]] <-  summary(transform(tmp_apply, 
                                                          `diff` = (true_high - false_high) - (true_low - false_low), 
                                                          `ratio` = (true_high / false_high) / (true_low / false_low)), 
                                                parm = c("diff", "ratio")) %>% 
    as.data.frame() %>%
    mutate(iv = iv_vars[i])
  
}

m1_combined_disagreeable_out <- 
  m1_combined_disagreeable_out %>% bind_rows() %>% rownames_to_column("quantity") %>%
  mutate(quantity = ifelse(str_detect(quantity, "diff"), "diff", "ratio"), 
         hdl_agreeable = "disagreeable")

```

## Separate (All IVs in separate models)


```{r}

# iv vars vector without need for chaos, which I can't run two-way clustered SEs for in agreeable headline set
iv_vars_2_agreeable <- c("pid7_dem_sd", "share_accurate_sd", #"chaos_mean_sd",
               "political_interest_sd", "age_sd", "female",
               "education_sd", "news_is_biased_sd", "trust_news_soc_sd", "fb_use_sd", "faminc_new_sd")

iv_vars_2_disagreeable <- c("pid7_dem_sd", "share_accurate_sd", "chaos_mean_sd",
               "political_interest_sd", "age_sd", "female",
               "education_sd", "news_is_biased_sd", "trust_news_soc_sd", "fb_use_sd", "faminc_new_sd")

# create agreeable dataset
m2_combined_agreeable_data <- 
  long %>% filter(cond_int == "control") %>% filter(hdl_agreeable == 1)  %>% 
  dplyr::select(c(all_of(iv_vars_2_agreeable), share_01, hdl_true, weight, hdl_id, caseid)) %>% 
  na.omit()

# create disagreable dataset
m2_combined_disagreeable_data <- 
  long %>% filter(cond_int == "control") %>% filter(hdl_agreeable == 0)  %>% 
  dplyr::select(c(all_of(iv_vars_2_disagreeable), share_01, hdl_true, weight, hdl_id, caseid)) %>% 
  na.omit()

# create vector of variable names
model_vars <- c("share_01", "hdl_true",
                "weight", "caseid", "hdl_id", "hdl_agreeable")

# create nested agreeable dataset
m2_data_agreeable <- 
  long %>% filter(cond_int == "control") %>% filter(hdl_agreeable == 1)  %>% 
  dplyr::select(c(all_of(iv_vars_2_agreeable), all_of(model_vars))) %>% 

  gather(iv_key, iv_val, all_of(iv_vars_2_agreeable)) %>%
  dplyr::select(c(all_of(model_vars), iv_key, iv_val)) %>%
  na.omit() %>%
  group_by(iv_key) %>%
  nest()

# create nested disagreeable dataset
m2_data_disagreeable <- 
  long %>% filter(cond_int == "control") %>% filter(hdl_agreeable == 0)  %>% 
  dplyr::select(c(all_of(iv_vars_2_disagreeable), all_of(model_vars))) %>% 
  
  gather(iv_key, iv_val, all_of(iv_vars_2_disagreeable)) %>%
  dplyr::select(c(all_of(model_vars), iv_key, iv_val)) %>%
  na.omit() %>%
  group_by(iv_key) %>%
  nest()

# discerment modeling function
m2_discern_fun <- function(df){
glm(share_01 ~ hdl_true*iv_val,
    family = "gaussian", weights = weight, data = df)
}


# run agreeable models
m2_models_agreeable <- 
  m2_data_agreeable %>%
  mutate(model = map(data, m2_discern_fun))

# run disagreeable models
m2_models_disagreeable <- 
  m2_data_disagreeable %>%
  mutate(model = map(data, m2_discern_fun))


# simulation based inference (agreeable models)
m2_combined_agreeable_out <- list()

for(i in 1:length(iv_vars_2_agreeable)){

  tmp_sim <- 
    clarify::sim(m2_models_agreeable$model[[i]], n = n_sim, 
                 vcov = xeffect.glm(glm.obj = m2_models_agreeable$model[[i]], g1 = m2_models_agreeable$data[[i]]$caseid, g2 = m2_models_agreeable$data[[i]]$hdl_id))
  
  tmp_apply <- 
    sim_apply(tmp_sim, cl = 6,
              function(fit){
                true_high <- predict(fit, newdata = m2_data_agreeable$data[[i]] %>% mutate(hdl_true = 1, iv_val = 1), type = "response")
                true_low <- predict(fit, newdata = m2_data_agreeable$data[[i]] %>% mutate(hdl_true = 1, iv_val = 0), type = "response")
                false_high <- predict(fit, newdata = m2_data_agreeable$data[[i]] %>% mutate(hdl_true = 0, iv_val = 1), type = "response")
                false_low <- predict(fit, newdata = m2_data_agreeable$data[[i]] %>% mutate(hdl_true = 0, iv_val = 0), type = "response")
                
                c(true_high = mean(true_high), true_low = mean(true_low), 
                  false_high = mean(false_high), false_low = mean(false_low))})
  
  m2_combined_agreeable_out[[i]] <-  summary(transform(tmp_apply, 
                                                       `diff` = (true_high - false_high) - (true_low - false_low), 
                                                       `ratio` = (true_high / false_high) / (true_low / false_low)), 
                                             parm = c("diff", "ratio")) %>% 
    as.data.frame() %>%
    mutate(iv = iv_vars_2_agreeable[i])
  
}

m2_combined_agreeable_out <- 
  m2_combined_agreeable_out %>% bind_rows() %>% rownames_to_column("quantity") %>%
  mutate(quantity = ifelse(str_detect(quantity, "diff"), "diff", "ratio"),
         hdl_agreeable = "agreeable")


# simulation based inference (disagreeable models)

m2_combined_disagreeable_out <- list()

for(i in 1:length(iv_vars_2_disagreeable)){
  
  tmp_sim <- 
    clarify::sim(m2_models_disagreeable$model[[i]], n = n_sim, 
                 vcov = xeffect.glm(glm.obj = m2_models_disagreeable$model[[i]], g1 = m2_models_disagreeable$data[[i]]$caseid, g2 = m2_models_disagreeable$data[[i]]$hdl_id))
  
  tmp_apply <- 
    sim_apply(tmp_sim, cl = 6,
              function(fit){
                true_high <- predict(fit, newdata = m2_data_disagreeable$data[[i]] %>% mutate(hdl_true = 1, iv_val = 1), type = "response")
                true_low <- predict(fit, newdata = m2_data_disagreeable$data[[i]] %>% mutate(hdl_true = 1, iv_val = 0), type = "response")
                false_high <- predict(fit, newdata = m2_data_disagreeable$data[[i]] %>% mutate(hdl_true = 0, iv_val = 1), type = "response")
                false_low <- predict(fit, newdata = m2_data_disagreeable$data[[i]] %>% mutate(hdl_true = 0, iv_val = 0), type = "response")
                
                c(true_high = mean(true_high), true_low = mean(true_low), 
                  false_high = mean(false_high), false_low = mean(false_low))})
  
  m2_combined_disagreeable_out[[i]] <-  summary(transform(tmp_apply, 
                                                       `diff` = (true_high - false_high) - (true_low - false_low), 
                                                       `ratio` = (true_high / false_high) / (true_low / false_low)), 
                                             parm = c("diff", "ratio")) %>% 
    as.data.frame() %>%
    mutate(iv = iv_vars_2_disagreeable[i])
  
}

m2_combined_disagreeable_out <- 
  m2_combined_disagreeable_out %>% bind_rows() %>% rownames_to_column("quantity") %>%
  mutate(quantity = ifelse(str_detect(quantity, "diff"), "diff", "ratio"),
         hdl_agreeable = "disagreeable")

```

## Run Separate Agreeable NFC Model with One-Way Clustered SEs

Throughout our analysis, we use the simulation-based approach to calculating two-way clustered standard errors with survey weights (described in the manuscript). The weighted variance-covariance matrix for one of the models---the model predicting sharing discernment among agreeable headlines with need for chaos--- was not compatible with the simulation-based inference approach for calculating two-way clustered standard errors, so we calculated one-way clustered standard errors instead, using the simulation-based inference approach. The code immediately below runs this analysis. 

An alternative to simulation-based inference for calculating multiplicative discernment is a quasi-poisson model. In the second code chunk below, we run the model described above with two-way clustered standard errors using a quasi-poisson model. The confidence intervals are almost identical, indicating that the one-way and two-way clustered standard errors are almost identical in size. 


```{r}

# iv vars vector without need for chaos, which I can't run two-way clustered SEs for in agreeable headline set
iv_vars_3 <- c("chaos_mean_sd")

# create agreeable dataset
m3_combined_agreeable_data <- 
  long %>% filter(cond_int == "control") %>% filter(hdl_agreeable == 1)  %>% 
  dplyr::select(c(all_of(iv_vars_3), share_01, hdl_true, weight, hdl_id, caseid)) %>% 
  na.omit()

# create vector of variable names
model_vars <- c("share_01", "hdl_true",
                "weight", "caseid", "hdl_id", "hdl_agreeable")

# create nested agreeable dataset
m3_data_agreeable <- 
  long %>% filter(cond_int == "control") %>% filter(hdl_agreeable == 1)  %>% 
  dplyr::select(c(all_of(iv_vars_3), all_of(model_vars))) %>% 
  
  gather(iv_key, iv_val, all_of(iv_vars_3)) %>%
  dplyr::select(c(all_of(model_vars), iv_key, iv_val)) %>%
  na.omit() %>%
  group_by(iv_key) %>%
  nest()

# discernment modeling function
m3_discern_fun <- function(df){
  svyglm(share_01 ~ hdl_true*iv_val,
         family = "gaussian", design = svydesign(ids = ~caseid, weights = ~weight, data = df))
}

# run agreeable models
m3_models_agreeable <- 
  m3_data_agreeable %>%
  mutate(model = map(data, m3_discern_fun))

# simulation-based inference (agreeable)
m3_combined_agreeable_out <- list()

for(i in 1:length(iv_vars_3)){
  
  tmp_sim <- 
    clarify::sim(m3_models_agreeable$model[[i]], n = n_sim)
  
  tmp_apply <- 
    sim_apply(tmp_sim, cl = 6,
              function(fit){
                true_high <- predict(fit, newdata = m3_data_agreeable$data[[i]] %>% mutate(hdl_true = 1, iv_val = 1), type = "response")
                true_low <- predict(fit, newdata = m3_data_agreeable$data[[i]] %>% mutate(hdl_true = 1, iv_val = 0), type = "response")
                false_high <- predict(fit, newdata = m3_data_agreeable$data[[i]] %>% mutate(hdl_true = 0, iv_val = 1), type = "response")
                false_low <- predict(fit, newdata = m3_data_agreeable$data[[i]] %>% mutate(hdl_true = 0, iv_val = 0), type = "response")
                
                c(true_high = mean(true_high), true_low = mean(true_low), 
                  false_high = mean(false_high), false_low = mean(false_low))})
  
  m3_combined_agreeable_out[[i]] <-  summary(transform(tmp_apply, 
                                                       `diff` = (true_high - false_high) - (true_low - false_low), 
                                                       `ratio` = (true_high / false_high) / (true_low / false_low)), 
                                             parm = c("diff", "ratio")) %>% 
    as.data.frame() %>%
    mutate(iv = iv_vars_3[i])
  
}

m3_combined_agreeable_out <- 
  m3_combined_agreeable_out %>% bind_rows() %>% rownames_to_column("quantity") %>%
  mutate(quantity = ifelse(str_detect(quantity, "diff"), "diff", "ratio"),
         hdl_agreeable = "agreeable")

```

```{r}

# wrapper function to output results from glmweave()
source(here("code", "helper functions", "xeffect_summary.R"))

# iv vars vector without need for chaos, which I can't run two-way clustered SEs for in agreeable headline set
iv_vars_3 <- c("pid7_dem_sd", "share_accurate_sd", "chaos_mean_sd",
               "political_interest_sd", "age_sd", "female",
               "education_sd", "news_is_biased_sd", "trust_news_soc_sd", "fb_use_sd", "faminc_new_sd")

# create vector of variable names
model_vars <- c("share_01", "hdl_true",
                "weight", "caseid", "hdl_id", "hdl_agreeable")

# create dataset
m2_data_agreeable_nfc_only <- 
  long %>% filter(cond_int == "control") %>% filter(hdl_agreeable == 1)  %>% 
  dplyr::select(c(all_of(iv_vars_3), all_of(model_vars))) %>% 
  
  gather(iv_key, iv_val, all_of(iv_vars_3)) %>%
  dplyr::select(c(all_of(model_vars), iv_key, iv_val)) %>%
  na.omit() %>%
  group_by(iv_key) %>%
  filter(iv_key == "chaos_mean_sd")

# quasipoisson with weighted two-way clustered SEs
xeffect_summary(
    glm(share_01 ~ hdl_true*iv_val,
        family = "quasipoisson", weights = weight, data = m2_data_agreeable_nfc_only))

# exponentiated interaction coefficients provide estimate of multiplicative discernment
exp(-0.0887) #  0.9151201 (beta)
exp(-0.0887 - 1.96*0.0130) #0.8920974 (95% CI, low)
exp(-0.0887 + 1.96*0.0130) #0.9387369 (95% CI, high)

# note: these are nearly identical to the one-way clustered SEs reported in paper
m3_combined_agreeable_out[2,] # beta = .898, 95% CI, low = .862, 95% CI, high = 0.939

```

## Plot

```{r}


fig4 <- 
  
rbind(m1_combined_agreeable_out %>% mutate(model_type = "combined"), 
      m1_combined_disagreeable_out %>% mutate(model_type = "combined"), 
      m2_combined_agreeable_out %>% mutate(model_type = "separate"), 
      m2_combined_disagreeable_out %>% mutate(model_type = "separate"),
      m3_combined_agreeable_out %>% mutate(model_type = "separate")) %>%
  
  filter(quantity == "ratio") %>%

  mutate(iv = factor(iv, levels = c("chaos_mean_sd", "trust_news_soc_sd", "news_is_biased_sd", "fb_use_sd", "female",
                                     "faminc_new_sd", "political_interest_sd", "age_sd", "share_accurate_sd",
                                     "education_sd", "pid7_dem_sd"))) %>%
  mutate(hdl_agreeable = ifelse(hdl_agreeable == "agreeable", "Agreeable",
                       ifelse(hdl_agreeable == "disagreeable", "Disagreeable", NA))) %>%
  mutate(iv = recode(iv,
                      female = "Female",
                      share_accurate_sd = "Accuracy Important (sd)",
                      chaos_mean_sd = "Need for Chaos (sd)",
                      pid7_dem_sd = "Party Identification (sd) \n[higher values = Democrat]",
                      fb_use_sd = "Facebook Usage Frequency (sd)",
                      trust_news_soc_sd = "Trust News on Social Media (sd)",
                      education_sd = "Education (sd)",
                      age_sd = "Age (sd)",
                      political_interest_sd = "Political Interest (sd)",
                      faminc_new_sd = "Family Income (sd)",
                      news_is_biased_sd = "News is Biased (sd)")) %>%
  mutate(model_type = factor(model_type, levels = rev(c("separate", "combined")))) %>%
  
  ggplot(aes(x = Estimate, 
             xmin = `2.5 %`, 
             xmax = `97.5 %`, 
             y = iv, 
             shape = model_type,
             col = model_type)) + 
  geom_vline(xintercept = 1) + 
  geom_pointrangeh(position = position_dodge(.3)) + 
  facet_wrap(vars(hdl_agreeable)) +
  scale_shape_manual(name = "Model Type:",
                     breaks = c("separate", "combined"), 
                     labels = c("Separate", "Combined"),
                     values = c(16, 15)) + 
  scale_color_manual(name = "Model Type:",
                     breaks = c("separate", "combined"), 
                     labels = c("Separate", "Combined"),
                     values = c("darkgray", "black")) + 
  theme_bw() + 
  theme(legend.position = "bottom") + 
  ylab("")

ggsave(fig4, width = 7, height = 5, filename = here("figures", "main", "fig4.pdf"))

```


